| Purpose of analysis |
| Enhanced match data to investigate changes determined by switching from Centrally Held Market Extension product to the Location Plus product. |
ME Locations [ME Locations] (https://autotrader.eu.looker.com/explore/customer_performance/retailer_performance?qid=UOGyp76yoNhXo6N30w1V43&toggle=dat,fil,vis,vse) 
Currently locations are closely grouped together in the South East region of the UK.
VRM Example - Direct Match vs ME Match Current direct match does not properly measure market extension, a vrm could be seen at multiple locations if on the location plus product. The example below shows that this specific vrm should have a direct match as it was viewed at a different location (due to ME) to where it was sold at. ——————————————-
| GU15MXO |
DT10 |
10028897 |
LB62NKZ |
10027421 |
2021-07-01 |
0 |
| GU15MXO |
DT10 |
10028897 |
ML62AUR |
10027421 |
2021-07-01 |
0 |
| GU15MXO |
DT10 |
10028897 |
GU15MXO |
10027421 |
2021-07-01 |
1 |
| GU15MXO |
DT10 |
10028897 |
ML62AUR |
10027421 |
2021-07-01 |
0 |
| GU15MXO |
DT10 |
10028897 |
ML62AUR |
10027421 |
2021-07-01 |
0 |
VRM Example Map View - Direct Match vs ME Match The map view below details the specific vrm example: it’s buyers location, viewed retailer location and sold retailer locations ——————————————–
vrm_example_map <- vrm_example %>%
filter(me_vrm_match_flag == 1) %>%
select(sold_vrm, sold_did, sold_did_lat, sold_did_lon, sold_did_postcode, buyyer_postcode_district, buyer_lat, buyer_lon, viewed_vrm, viewed_did, viewed_retailer_postcode, viewed_retailer_postcode_1, viewed_retailer_postcode_2, viewed_distance_between)
worldmap = map_data('world')
ggplot() +
geom_polygon(data = worldmap,
aes(x = long, y = lat,
group = group),
fill = 'gray90',
color = 'black') +
coord_fixed(ratio = 1.3,
xlim = c(-5,1),
ylim = c(50, 55)) +
theme_void() +
geom_point(data = vrm_example_map, aes(x = sold_did_lon, y = sold_did_lat,col = "sold location")) +
geom_point(data = vrm_example_map, aes(x = viewed_retailer_postcode_2, y = viewed_retailer_postcode_1, col = "viewed location")) +
geom_point(data = vrm_example_map, aes(x = buyer_lon, y = buyer_lat, col = "buyer location")) + theme(legend.position="right")

| VRM Example Map View - Direct Match vs ME Match |
| The map view below details the specific vrm example: it’s buyers location, viewed retailer location and sold retailer locations. |
direct_vs_me_matches_agg <- enhanced_match %>%
group_by(sold_vrm, sold_month, sold_make, type_of_stock) %>%
mutate(value_added_products = ifelse(Value.added.Products >0, 1,0),
matched_viewed_distance_between = ifelse( (me_vrm_match_flag + site_all_match_flag + group_all_match_flag) > 0, viewed_distance_between, NA)
)%>%
summarise(row_count = n(),
me_vrm_match_flag = max(me_vrm_match_flag, na.rm = TRUE),
direct_match_flag = max(vrm_match_flag, na.rm = TRUE))
direct_vs_me_matches <- direct_vs_me_matches_agg %>%
group_by(sold_month) %>%
summarise(me_vrm_matches = sum(me_vrm_match_flag, na.rm= TRUE),
direct_matches = sum(direct_match_flag, na.rm=TRUE))
direct_vs_me_matches_m <- melt(direct_vs_me_matches[,c('sold_month', 'me_vrm_matches', 'direct_matches')],id.vars = 1)
ggplot(direct_vs_me_matches_m,aes(x = sold_month,y = value)) +
geom_bar(aes(fill = variable),stat = "identity",position = "stack") + theme_minimal() + scale_fill_brewer() + labs(title = "Direct vs ME Matches") + labs(x = expression("Month"), y = "Number of Matches")

# Create a vrm level summary of the events level data for analysis
vrm_agg <- enhanced_match %>%
group_by(sold_vrm, sold_month, sold_make, type_of_stock) %>%
mutate(value_added_products = ifelse(Value.added.Products >0, 1,0),
matched_viewed_distance_between = ifelse( (me_vrm_match_flag + site_all_match_flag + group_all_match_flag) > 0, viewed_distance_between, NA)
)%>%
summarise(row_count = n(),
me_vrm_match_flag = max(me_vrm_match_flag, na.rm = TRUE),
site_match_flag = max(site_all_match_flag, na.rm = TRUE),
group_match_flag = max(group_all_match_flag, na.rm = TRUE),
matched_viewed_distance_between = mean(matched_viewed_distance_between, na.rm=T),
profit_margin = mean(Chassis.Margin, na.rm=T),
value_added_products_flag = max(value_added_products)
) %>%
mutate(site_match_flag = ifelse(me_vrm_match_flag == 1 ,0, site_match_flag),
group_match_flag = ifelse( (site_match_flag == 1) | (me_vrm_match_flag == 1) ,0, group_match_flag),
no_match_flag = ifelse((me_vrm_match_flag + site_match_flag + group_match_flag) >0, 0, 1),
)
# replace NA with no match as this is a legit banding - if no match then ther is no distanc, so still want to include this in charts
vrm_agg$matched_viewed_distance_between[is.nan(vrm_agg$matched_viewed_distance_between)]<- -1
# create distance banding and turn into factor so shows in right order on charts
vrm_agg <- vrm_agg %>%
mutate(viewed_distance_banding = ifelse(matched_viewed_distance_between == -1, "No match",
ifelse(matched_viewed_distance_between <10, "less than 10 miles",
ifelse(matched_viewed_distance_between <20, "less than 20 miles",
ifelse(matched_viewed_distance_between <50, "less than 50 miles",
ifelse(matched_viewed_distance_between <100, "less than 100 miles",
ifelse(matched_viewed_distance_between >=100, "100 or more miles"
)))))))
# convert to factore with levels
vrm_agg$viewed_distance_banding <- factor(vrm_agg$viewed_distance_banding, levels = c("less than 10 miles", "less than 20 miles", "less than 50 miles", "less than 100 miles", "100 or more miles", 'No match'), ordered = TRUE)
# change back ***SKANKY, NEED TO FIND BETTER WASY TO DO THIS!!!! ***
vrm_agg$matched_viewed_distance_between[vrm_agg$matched_viewed_distance_between == -1]<- NaN
| How many sales did Auto Trader influence (split by the usual VRM, Site, Group level) |
| Sales increased in July 2021 for the DCC group as they moved onto the location plus product, but sales dropped down to previous months levels in August. |
matches <- vrm_agg %>%
group_by(sold_month) %>%
summarise(me_vrm_matches = sum(me_vrm_match_flag, na.rm= TRUE),
site_matches = sum(site_match_flag, na.rm=TRUE),
group_matches = sum(group_match_flag, na.rm = TRUE),
no_matches = sum(no_match_flag, na.rm = TRUE))
matches_m <- melt(matches[,c('sold_month', 'no_matches', 'group_matches','site_matches', 'me_vrm_matches')],id.vars = 1)
ggplot(matches_m,aes(x = sold_month,y = value)) +
geom_bar(aes(fill = variable),stat = "identity",position = "stack") + theme_minimal() + scale_fill_brewer() + labs(title = "Autotrader Influence") + labs(x = expression("Month"), y = "Influence")

| How far away is the buyer to the seller? |
| There does not look to be a large change in the distance between buyer and seller but this is likely due to the fact that DCC have locations grouped close together. |
# create distance aggregation to show how distance buyer travels changes over time
distance_matches <- vrm_agg %>%
filter(!is.na(viewed_distance_banding)) %>%
group_by(sold_month, viewed_distance_banding) %>%
summarise(me_vrm_matches = sum(me_vrm_match_flag, na.rm= TRUE),
site_matches = sum(site_match_flag, na.rm=TRUE),
group_matches = sum(group_match_flag, na.rm = TRUE),
no_matches = sum(no_match_flag, na.rm = TRUE))
# melt to in right format for plotting
distance_matches_m <- melt(distance_matches[,c('sold_month', 'viewed_distance_banding', 'me_vrm_matches', 'site_matches', 'group_matches', 'no_matches')],id.vars = c(1,2))
# create barplot
tmp <- distance_matches_m %>% group_by(sold_month) %>% summarise(monthly_total = sum(value, na.rm=T))
distance_matches_m_agg <- left_join(distance_matches_m, tmp, by = c('sold_month'), copy=T)
distance_matches_m_agg <- distance_matches_m_agg %>%
ungroup() %>%
group_by(sold_month, viewed_distance_banding) %>%
summarise(percent = sum(value, na.rm=T)/monthly_total)
ggplot(distance_matches_m_agg,aes(x = sold_month,y = percent)) +
geom_bar(aes(fill = viewed_distance_banding),stat = "identity",position = "fill") + theme_minimal() + scale_fill_brewer() + labs(title = "% of sales by distance banding") + labs(x = expression("Month"), y = "Influence") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

#need to sort labelling out ....geom_text(aes(label=distance_matches_m_agg$percent, y=distance_matches_m_agg$percent/100)) +
#forcats::fct_rev
| Profit by Distance Sold - July 2021 Data |
| Enriched profit data for DCC was only available for July 2021, the data shows that average profit does not seem to fluctuate much when the buyer is further away from the retailer. Most sales for DCC came from less than 100 miles away from the site location. |
profit_agg <- vrm_agg %>%
filter(no_match_flag == 0) %>%
group_by(viewed_distance_banding) %>%
summarise(row_count = n(),
distinct_sales = n_distinct(sold_vrm),
avg_profit = mean(profit_margin, na.rm = TRUE))
#hist(vrm_agg$profit_margin, col = 'skyblue3', breaks = 20, xlab = 'profit margin', main = "Avg profit distribution")
par(mai=c(1,2,1,1))
barplot(profit_agg$avg_profit, names.arg = profit_agg$viewed_distance_banding, col="dodgerblue3",
density=c(30,10,20,35,15,25),angle=60, main = "Avg profit by distance", xlab = "Mean Profit(£)" , cex.names=0.7, horiz = TRUE, las=1)

par(mai=c(1,2,1,1))
barplot(profit_agg$distinct_sales, names.arg = profit_agg$viewed_distance_banding, col="dodgerblue3",
density=c(30,10,20,35,15,25),angle=60, main = "Amount of sales by distance banding", xlab = "Number of Sales" , cex.names=0.7, horiz = TRUE, las=1)

| What is the total value/profit of cars sold through Market Extension - product add ons |
| Although less sales are seen with finance add ons in July 2021 those that were sold with add ons were the most likely to come from 50 - 100 miles away from the retailer location. |
has_finance <- vrm_agg %>%
group_by(viewed_distance_banding) %>%
filter(value_added_products_flag == 1) %>%
summarise(distinct_sales = n_distinct(sold_vrm))
no_finance <- vrm_agg %>%
group_by(viewed_distance_banding) %>%
filter(value_added_products_flag == 0) %>%
summarise(distinct_sales = n_distinct(sold_vrm))
par(mfrow=c(1,2))
barplot(has_finance$distinct_sales, names.arg = has_finance$viewed_distance_banding, col = 'steelblue', main = "Add Ons", xlab = "Distinct Sales",cex.names=0.7, horiz = TRUE, las=1)
barplot(no_finance$distinct_sales, names.arg = no_finance$viewed_distance_banding, col = 'steelblue', main = "No Add Ons", xlab = "Distinct Sales", cex.names=0.7, horiz = TRUE, las=1)

| How does distance between buyer & seller vary by sold vehicle type (e.g. standard vs. premium)? |
premium_standard_matches <- vrm_agg %>%
filter(type_of_stock %in% c('standard', 'premium')) %>%
group_by(sold_month, viewed_distance_banding, type_of_stock) %>%
summarise(me_vrm_matches = sum(me_vrm_match_flag, na.rm= TRUE),
site_matches = sum(site_match_flag, na.rm=TRUE),
group_matches = sum(group_match_flag, na.rm = TRUE),
no_matches = sum(no_match_flag, na.rm = TRUE))
# melt to in right format for plotting
premium_standard_matches_m <- melt(premium_standard_matches[,c('sold_month','viewed_distance_banding', 'type_of_stock', 'no_matches', 'group_matches','site_matches', 'me_vrm_matches')],id.vars = c(1,2,3))
# create barplot
ggplot(premium_standard_matches_m,aes(x = sold_month,y = value)) +
geom_bar(aes(fill = viewed_distance_banding),stat = "identity",position = "fill") + theme_minimal() + scale_fill_brewer() + labs(title = "Distance Banding") + labs(x = expression("Month"), y = "Influence")+ facet_grid(cols = vars(type_of_stock))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

| How does influence rate between buyer & seller vary by sold vehicle type (e.g. standard vs. premium)? |
ggplot(premium_standard_matches_m,aes(x = sold_month,y = value)) +
geom_bar(aes(fill = variable),stat = "identity",position = "fill") + theme_minimal() + scale_fill_brewer() + labs(title = "Autotrader Influence") + labs(x = expression("Month"), y = "Influence")+ facet_grid(cols = vars(type_of_stock))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
